import os, sys, re,csv
import requests
import datetime
cwd = os.getcwd()
sys.path.append(cwd)
from spider.pub_func import folder_exist , DownloadFile

def get_img_url(url,headers,path):
    '''
    匹配图片名并写入csv
    '''
    file_path = base_dir + 'log/' + str(datetime.date.today()) + '_' + 'download_RCNT.csv'
    pattern = 'CV1_RCNT_1000.*'
    response = requests.get(url,headers = headers)
    # print(response)
    img_list = re.findall(pattern, response.text)
    for img in img_list:
        img_url = 'https://www.cwb.gov.tw/Data/radar_rain/' + img.split(',')[0][0:-1]
        # print(img_url)
        with open(file_path,'a+',newline='',encoding='utf8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([img_url])          #[img_url]用来解决一个字符占一个单元格
    get_img(file_path)
def get_img(file_path):
    img_url_list = []
    with open(file_path, 'r') as f:
        reader = csv.reader(f)
        # print(type(reader))
        for row in reader:
            img_url_list.append(row[0])
    img_set = set(img_url_list)
    for radar_img_url in img_set:
        # print(radar_img_url)
        img_name = str(radar_img_url).split('/')[-1]
        folder_name = img_name.split('_')[3][:8]
        radar_file_path = '/media/ubuntu/08f29263-3882-43db-96fd-febb6f2e2e5f1/taiwan_radar/RCNT/' + str(folder_name) + '/'
        check_img_path = radar_file_path + img_name
        # print(check_img_path)
        if os.path.exists(check_img_path) == False :
            # print('该文件不存在',check_img_path)
            Download(radar_img_url,img_name,radar_file_path)
def Download(radar_img_url,img_name,radar_file_path):
    folder_exist(radar_file_path)
    # print(file_name)
    # print(radar_file_path)
    try:
        DownloadFile(radar_img_url,radar_file_path,img_name)
        radar_img_path = radar_file_path + img_name
        print(radar_img_path + '下载成功')
        check_img(radar_img_path)
    except:
        print('下载失败',radar_img_url)
def check_img(radar_img_path):
    img_size = os.path.getsize(radar_img_path)
    if img_size < 195000:
        os.remove(radar_img_path)

if __name__ == '__main__':
    url = 'https://www.cwb.gov.tw/Data/js/obs_img/Observe_radar_rain.js'
    headers = {'Connection': 'keep-alive',
              'Cache-Control': 'max-age=0',
              'Upgrade-Insecure-Requests': '1',
              'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko)',
              'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
              'Accept-Encoding': 'gzip, deflate, sdch',
              'Accept-Language': 'zh-CN,zh;q=0.8',
              }
    base_dir = '/media/ubuntu/08f29263-3882-43db-96fd-febb6f2e2e5f1/taiwan_radar/'
    get_img_url(url,headers,base_dir)




# https://www.cwb.gov.tw/Data/radar_rain/CV1_RCNT_1000/CV1_RCNT_1000_20200703073029.png
